from HTMLParser import HTMLParser
from HTMLParser import HTMLParseError
from pandas import DataFrame

class Para(HTMLParser):
	def __init__(self,*keys):

		HTMLParser.__init__(self)
		self.keys = keys
		self.content = DataFrame()
		self.tags = {}

		def _init(each):
			self.tags[each] = []
		map(_init, self.keys+("title",))

		self.body =False
		self.this_tag = None

	def handle_starttag(self,tag,attrs):
		if tag in self.keys:
			print "get : ",tag
			self.this_tag = tag
			self.body =True
			self.tags[tag] = []
		if tag == "title":
			self.get_title = True
			self.tags[tag] = []
	def handle_data(self, data):
		print "data"
		if self.get_title:
			self.name = data
			self.get_title =None
		elif self.body:
			print "data" ,data
			if self.this_tag :
				self.tags[self.this_tag].append(data)
				self.this_tag =None
			self.create_a_data(data)
			self.body = None

	def  create_a_data(self,text):
		words =  text.split()
		def _static(word):
			try:
				self.content[word]+=1
			except KeyError:
				self.content[word] =0
				
		map(_static, words)

